home *** CD-ROM | disk | FTP | other *** search
- #
- # SimplePatterns.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- import string
-
- from consts import *
- from utilities import *
-
-
- class SimplePatterns (object):
- """A list of (persistent) patterns
- -------------------------------------
- fn: file name
- numbered: True iff the regex matching will report the last matched index (via search())
- theList: the list of dictionaries; the keys are names and the values are patterns
- thePattern: the regex produced by conjoining all patterns with '|'.
- """
- def __init__ (self, fn, numbered = False):
- self.theList = []
- self.fn = fn
- self.numbered = numbered
- self.load()
-
- def load (self, ignoreCase = True):
- del self.theList[:]
- try:
- f = openFile(self.fn)
- except:
- # file might not exist
- self.thePattern = None
- return
-
- # SimplePatterns file format: odd lines are patterns, even lines are comments, followed by
- # ASSUMPTION: ignore the active state (everything is active)
- isName = False
- for line in filter(lambda l:len(l), map(string.strip, f)):
- if isName:
- isName = False
- d['name'] = line[1:-3]
- d['managed'] = bool(int(line[-1]))
- self.theList.append(d)
- else:
- d = {'pat':line[1:-1]}
- isName = True
-
- # theList is ascendingly sorted on names
- self.theList.sort(lambda d1, d2: cmp(d1['name'], d2['name']))
- if self.numbered:
- pat = r'|'.join([r'(%s)' % d['pat'] for d in self.theList])
- else:
- pat = r'|'.join([r'(?:%s)' % d['pat'] for d in self.theList])
-
- if ignoreCase:
- self.thePattern = re.compile(pat, re.IGNORECASE)
- else:
- self.thePattern = re.compile(pat)
-
- def search (self, s):
- """Search in string s for any pattern in self.thePattern; returns the pattern name
- if one is found, otherwise returns None.
-
- IMPORTANT: only use this when self.numbered == True!
- """
- if self.thePattern is not None:
- mo = self.thePattern.search(s)
- if mo:
- return self.theList[mo.lastindex - 1]['name']
- return None
-
- def match (self, s):
- """Match self.thePattern with a string s; returns True/False if a match is/is not found;
- returns None if there is no pattern to begin with."""
- if self.thePattern is not None:
- return self.thePattern.search(s) is not None
- else:
- return None
-
- def save (self):
- print >> openFile(self.fn, 'w'),\
- u'%s' % '\n'.join(['"%s"\n"%s" %d' % (d['pat'], d['name'], int(d['managed'])) for d in self.theList])
-
- def add (self, name, pattern, managed = False):
- """Returns the index of the added pattern."""
- self.theList.append({'name':name, 'pat':pattern, 'managed':managed})
- self.thePattern = re.compile(r'|'.join([r'(%s)' % d['pat'] for d in self.theList]))
- self.save()
-
- def addMany (self, listOfDict):
- """Add all elements from listOfDict - each element is a dict with 3 keys: 'name', 'pat',
- and 'managed' (bool)."""
- self.theList.extend(listOfDict)
- self.thePattern = re.compile(r'|'.join([r'(%s)' % d['pat'] for d in self.theList]))
- self.save()
-
- def remove (self, idx):
- del self.theList[idx]
- self.thePattern = re.compile(r'|'.join([r'(%s)' % d['pat'] for d in self.theList]))
- self.save()
-
- def removeMany (self, removeList):
- for idx in removeList: del self.theList[idx]
- self.thePattern = re.compile(r'|'.join([r'(%s)' % d['pat'] for d in self.theList]))
- self.save()
-
- def update (self, idx, name, pattern):
- d = self.theList[idx]
- d['name'] = name
- d['pat'] = pattern
- d['managed'] = False
- self.thePattern = re.compile(r'|'.join([r'(%s)' % d['pat'] for d in self.theList]))
- self.save()
-
-
- if __name__ == '__main__':
- emailAddressPatterns = SimplePatterns('%swhitelist' % CONF_PATH)
- print '\n'.join(['%s: "%s"' % (d['name'], d['pat']) for d in emailAddressPatterns.theList])
- print
-
- print emailAddressPatterns.search('someone@sourceforge.net')
- idx = emailAddressPatterns.add('Sent from CS.CMU', '(?i)@cs\.cmu\.edu$')
- print emailAddressPatterns.search('someone@cs.cmu.edu')
- emailAddressPatterns.remove(idx)
- print emailAddressPatterns.search('someone@cs.cmu.edu')
-
-